// Copyright (C) Kumo inc. and its affiliates.
// Author: Jeff.li lijippy@163.com
// All rights reserved.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program.  If not, see <https://www.gnu.org/licenses/>.
//


// Adapted from Apache Arrow.

#pragma once

#include <cstdint>
#include <memory>
#include <utility>

#include <pollux/dwio/parquet/writer/arrow/metadata.h>
#include <pollux/dwio/parquet/writer/arrow/platform.h>
#include <pollux/dwio/parquet/writer/arrow/properties.h>
#include <pollux/dwio/parquet/writer/arrow/schema.h>

namespace kumo::pollux::parquet::arrow {
    class ColumnWriter;

    // FIXME: copied from reader-internal.cc
    static constexpr uint8_t kParquetMagic[4] = {'P', 'A', 'R', '1'};
    static constexpr uint8_t kParquetEMagic[4] = {'P', 'A', 'R', 'E'};

    class PARQUET_EXPORT RowGroupWriter {
    public:
        // Forward declare a virtual class 'Contents' to aid dependency injection and
        // more easily create test fixtures An implementation of the Contents class is
        // defined in the .cc file
        struct Contents {
            virtual ~Contents() = default;

            virtual int num_columns() const = 0;

            virtual int64_t num_rows() const = 0;

            // to be used only with ParquetFileWriter::AppendRowGroup
            virtual ColumnWriter *NextColumn() = 0;

            // to be used only with ParquetFileWriter::AppendBufferedRowGroup
            virtual ColumnWriter *column(int i) = 0;

            virtual int current_column() const = 0;

            virtual void Close() = 0;

            /// \brief total uncompressed bytes written by the page writer
            virtual int64_t total_bytes_written() const = 0;

            /// \brief total bytes still compressed but not written by the page writer
            virtual int64_t total_compressed_bytes() const = 0;

            /// \brief total compressed bytes written by the page writer
            virtual int64_t total_compressed_bytes_written() const = 0;

            virtual bool buffered() const = 0;
        };

        explicit RowGroupWriter(std::unique_ptr<Contents> contents);

        /// Construct a ColumnWriter for the indicated row group-relative column.
        ///
        /// To be used only with ParquetFileWriter::AppendRowGroup
        /// Ownership is solely within the RowGroupWriter. The ColumnWriter is only
        /// valid until the next call to NextColumn or Close. As the contents are
        /// directly written to the sink, once a new column is started, the contents
        /// of the previous one cannot be modified anymore.
        ColumnWriter *NextColumn();

        /// Index of currently written column. Equal to -1 if NextColumn()
        /// has not been called yet.
        int current_column();

        void Close();

        int num_columns() const;

        /// Construct a ColumnWriter for the indicated row group column.
        ///
        /// To be used only with ParquetFileWriter::AppendBufferedRowGroup
        /// Ownership is solely within the RowGroupWriter. The ColumnWriter is
        /// valid until Close. The contents are buffered in memory and written to sink
        /// on Close
        ColumnWriter *column(int i);

        /**
         * Number of rows that shall be written as part of this RowGroup.
         */
        int64_t num_rows() const;

        /// \brief total uncompressed bytes written by the page writer
        int64_t total_bytes_written() const;

        /// \brief total bytes still compressed but not written by the page writer.
        /// It will always return 0 from the SerializedPageWriter.
        int64_t total_compressed_bytes() const;

        /// \brief total compressed bytes written by the page writer
        int64_t total_compressed_bytes_written() const;

        /// Returns whether the current RowGroupWriter is in the buffered mode and is
        /// created by calling ParquetFileWriter::AppendBufferedRowGroup.
        bool buffered() const;

    private:
        // Holds a pointer to an instance of Contents implementation
        std::unique_ptr<Contents> contents_;
    };

    PARQUET_EXPORT
    void WriteFileMetaData(
        const FileMetaData &file_metadata,
        ::arrow::io::OutputStream *sink);

    PARQUET_EXPORT
    void WriteMetaDataFile(
        const FileMetaData &file_metadata,
        ::arrow::io::OutputStream *sink);

    PARQUET_EXPORT
    void WriteEncryptedFileMetadata(
        const FileMetaData &file_metadata,
        ArrowOutputStream *sink,
        const std::shared_ptr<Encryptor> &encryptor,
        bool encrypt_footer);

    PARQUET_EXPORT
    void WriteEncryptedFileMetadata(
        const FileMetaData &file_metadata,
        ::arrow::io::OutputStream *sink,
        const std::shared_ptr<Encryptor> &encryptor = NULLPTR,
        bool encrypt_footer = false);

    PARQUET_EXPORT
    void WriteFileCryptoMetaData(
        const FileCryptoMetaData &crypto_metadata,
        ::arrow::io::OutputStream *sink);

    class PARQUET_EXPORT ParquetFileWriter {
    public:
        // Forward declare a virtual class 'Contents' to aid dependency injection and
        // more easily create test fixtures An implementation of the Contents class is
        // defined in the .cc file
        struct Contents {
            Contents(
                std::shared_ptr<schema::GroupNode> schema,
                std::shared_ptr<const KeyValueMetadata> key_value_metadata)
                : schema_(), key_value_metadata_(std::move(key_value_metadata)) {
                schema_.Init(std::move(schema));
            }

            virtual ~Contents() {
            }

            // Perform any cleanup associated with the file contents
            virtual void Close() = 0;

            /// \note Deprecated since 1.3.0
            RowGroupWriter *AppendRowGroup(int64_t num_rows);

            virtual RowGroupWriter *AppendRowGroup() = 0;

            virtual RowGroupWriter *AppendBufferedRowGroup() = 0;

            virtual int64_t num_rows() const = 0;

            virtual int num_columns() const = 0;

            virtual int num_row_groups() const = 0;

            virtual const std::shared_ptr<WriterProperties> &properties() const = 0;

            const std::shared_ptr<const KeyValueMetadata> &key_value_metadata() const {
                return key_value_metadata_;
            }

            virtual void AddKeyValueMetadata(
                const std::shared_ptr<const KeyValueMetadata> &key_value_metadata) = 0;

            // Return const-pointer to make it clear that this object is not to be
            // copied
            const SchemaDescriptor *schema() const {
                return &schema_;
            }

            SchemaDescriptor schema_;

            /// This should be the only place this is stored. Everything else is a const
            /// reference
            std::shared_ptr<const KeyValueMetadata> key_value_metadata_;

            const std::shared_ptr<FileMetaData> &metadata() const {
                return file_metadata_;
            }

            std::shared_ptr<FileMetaData> file_metadata_;
        };

        ParquetFileWriter();

        ~ParquetFileWriter();

        static std::unique_ptr<ParquetFileWriter> Open(
            std::shared_ptr<::arrow::io::OutputStream> sink,
            std::shared_ptr<schema::GroupNode> schema,
            std::shared_ptr<WriterProperties> properties =
                    default_writer_properties(),
            std::shared_ptr<const KeyValueMetadata> key_value_metadata = NULLPTR);

        void Open(std::unique_ptr<Contents> contents);

        void Close();

        // Construct a RowGroupWriter for the indicated number of rows.
        //
        // Ownership is solely within the ParquetFileWriter. The RowGroupWriter is
        // only valid until the next call to AppendRowGroup or AppendBufferedRowGroup
        // or Close.
        // @param num_rows The number of rows that are stored in the new RowGroup
        //
        // \deprecated Since 1.3.0
        RowGroupWriter *AppendRowGroup(int64_t num_rows);

        /// Construct a RowGroupWriter with an arbitrary number of rows.
        ///
        /// Ownership is solely within the ParquetFileWriter. The RowGroupWriter is
        /// only valid until the next call to AppendRowGroup or AppendBufferedRowGroup
        /// or Close.
        RowGroupWriter *AppendRowGroup();

        /// Construct a RowGroupWriter that buffers all the values until the RowGroup
        /// is ready. Use this if you want to write a RowGroup based on a certain size
        ///
        /// Ownership is solely within the ParquetFileWriter. The RowGroupWriter is
        /// only valid until the next call to AppendRowGroup or AppendBufferedRowGroup
        /// or Close.
        RowGroupWriter *AppendBufferedRowGroup();

        /// \brief Add key-value metadata to the file.
        /// \param[in] key_value_metadata the metadata to add.
        /// \note This will overwrite any existing metadata with the same key.
        /// \throw ParquetException if Close() has been called.
        void AddKeyValueMetadata(
            const std::shared_ptr<const KeyValueMetadata> &key_value_metadata);

        /// Number of columns.
        ///
        /// This number is fixed during the lifetime of the writer as it is determined
        /// via the schema.
        int num_columns() const;

        /// Number of rows in the yet started RowGroups.
        ///
        /// Changes on the addition of a new RowGroup.
        int64_t num_rows() const;

        /// Number of started RowGroups.
        int num_row_groups() const;

        /// Configuration passed to the writer, e.g. the used Parquet format version.
        const std::shared_ptr<WriterProperties> &properties() const;

        /// Returns the file schema descriptor
        const SchemaDescriptor *schema() const;

        /// Returns a column descriptor in schema
        const ColumnDescriptor *descr(int i) const;

        /// Returns the file custom metadata
        const std::shared_ptr<const KeyValueMetadata> &key_value_metadata() const;

        /// Returns the file metadata, only available after calling Close().
        const std::shared_ptr<FileMetaData> metadata() const;

    private:
        // Holds a pointer to an instance of Contents implementation
        std::unique_ptr<Contents> contents_;
        std::shared_ptr<FileMetaData> file_metadata_;
    };
} // namespace kumo::pollux::parquet::arrow
